"""
Integration tests for media endpoint with contextual chunking options.

Tests the full flow of media ingestion with contextual features.
"""
import os

import pytest
from unittest.mock import Mock, MagicMock, patch, AsyncMock
import pytest
pytestmark = pytest.mark.unit
from fastapi.testclient import TestClient
from fastapi import UploadFile
import json
import io
import tempfile
from pathlib import Path

from tldw_Server_API.app.main import app as real_app
from tldw_Server_API.app.api.v1.schemas.media_request_models import AddMediaForm


class TestMediaEndpointContextualIntegration:
    """Integration tests for media endpoint with contextual chunking."""

    @pytest.fixture
    def test_client(self):
        """Use the full real application for integration tests."""
        # Override auth/db dependencies for stable integration behavior
        from tldw_Server_API.app.api.v1.API_Deps.DB_Deps import get_media_db_for_user as dep_get_db
        from tldw_Server_API.app.core.AuthNZ.User_DB_Handling import get_request_user as dep_get_user
        mock_user = Mock(id="test_user")
        mock_db = Mock(db_path="/test/path.db", db_path_str="/test/path.db", client_id="test_client")
        overrides = {
            dep_get_user: lambda: mock_user,
            dep_get_db: lambda: mock_db,
        }
        original = real_app.dependency_overrides.copy()
        real_app.dependency_overrides.update(overrides)
        try:
            yield TestClient(real_app)
        finally:
            real_app.dependency_overrides = original

    @pytest.fixture
    def auth_headers(self):
        # Real app runs in single_user mode by default in tests; use test API key
        api_key = os.getenv("SINGLE_USER_API_KEY", "test-api-key-12345")
        return {
            "X-API-KEY": api_key,
            "x-api-key": api_key
        }

    @pytest.fixture
    def mock_dependencies(self):
        """Mock all required dependencies."""
        with patch('tldw_Server_API.app.api.v1.endpoints.media.get_request_user') as mock_user:
            with patch('tldw_Server_API.app.api.v1.endpoints.media.get_media_db_for_user') as mock_db:
                mock_user.return_value = Mock(id="test_user")
                mock_db.return_value = Mock(db_path="/test/path.db")
                yield {
                    "user": mock_user,
                    "db": mock_db
                }

    def test_add_media_with_contextual_chunking_enabled(self, test_client, mock_dependencies, auth_headers):
        """Test adding media with contextual chunking enabled."""
        # Prepare request data
        form_data = {
            "media_type": "document",
            "urls": json.dumps(["https://example.com/test.pdf"]),
            "perform_chunking": "true",
            "chunk_size": "500",
            "chunk_overlap": "100",
            "enable_contextual_chunking": "true",  # Enable contextual
            "contextual_llm_model": "gpt-4",
            "context_window_size": "750"
        }

        with patch('tldw_Server_API.app.api.v1.endpoints.media.process_document_content') as mock_process:
            mock_process.return_value = {"success": True, "media_id": 123}

            response = test_client.post(
                "/api/v1/media/add",
                data=form_data,
                headers=auth_headers
            )

            # Verify the chunking options were passed correctly
            if mock_process.called:
                call_args = mock_process.call_args
                chunk_options = call_args[1].get('chunk_options', {})

                assert chunk_options.get('enable_contextual_chunking') == True
                assert chunk_options.get('contextual_llm_model') == "gpt-4"
                assert chunk_options.get('context_window_size') == 750

    def test_add_media_with_contextual_chunking_disabled(self, test_client, mock_dependencies, auth_headers):
        """Test adding media with contextual chunking explicitly disabled."""
        form_data = {
            "media_type": "document",
            "urls": json.dumps(["https://example.com/test.pdf"]),
            "perform_chunking": "true",
            "chunk_size": "500",
            "chunk_overlap": "100",
            "enable_contextual_chunking": "false"  # Explicitly disable
        }

        with patch('tldw_Server_API.app.api.v1.endpoints.media.process_document_content') as mock_process:
            mock_process.return_value = {"success": True, "media_id": 123}

            response = test_client.post(
                "/api/v1/media/add",
                data=form_data,
                headers=auth_headers
            )

            if mock_process.called:
                call_args = mock_process.call_args
                chunk_options = call_args[1].get('chunk_options', {})

                assert chunk_options.get('enable_contextual_chunking') == False

    def test_add_media_contextual_defaults_from_config(self, test_client, mock_dependencies, auth_headers):
        """Test that contextual chunking uses config defaults when not specified."""
        form_data = {
            "media_type": "document",
            "urls": json.dumps(["https://example.com/test.pdf"]),
            "perform_chunking": "true",
            "chunk_size": "500",
            "chunk_overlap": "100"
            # No contextual options specified - should use defaults
        }

        with patch('tldw_Server_API.app.api.v1.endpoints.media.process_document_content') as mock_process:
            mock_process.return_value = {"success": True, "media_id": 123}

            response = test_client.post(
                "/api/v1/media/add",
                data=form_data,
                headers=auth_headers
            )

            if mock_process.called:
                call_args = mock_process.call_args
                chunk_options = call_args[1].get('chunk_options', {})

                # Should have default value (False based on our config)
                assert chunk_options.get('enable_contextual_chunking') == False
                assert chunk_options.get('contextual_llm_model') is None
                assert chunk_options.get('context_window_size') is None

    def test_add_media_file_upload_with_contextual(self, test_client, mock_dependencies):
        """Test file upload with contextual chunking options."""
        # Create a test file
        test_content = b"Test document content for contextual chunking"
        test_file = io.BytesIO(test_content)

        files = {
            "files": ("test.txt", test_file, "text/plain")
        }

        form_data = {
            "media_type": "document",
            "perform_chunking": "true",
            "enable_contextual_chunking": "true",
            "contextual_llm_model": "gpt-3.5-turbo"
        }

        with patch('tldw_Server_API.app.api.v1.endpoints.media._process_uploaded_files') as mock_upload:
            with patch('tldw_Server_API.app.api.v1.endpoints.media.process_document_content') as mock_process:
                mock_upload.return_value = (["/tmp/test.txt"], [])
                mock_process.return_value = {"success": True, "media_id": 124}

                response = test_client.post(
                    "/api/v1/media/add",
                    data=form_data,
                    files=files,
                    headers={"Authorization": "Bearer test_token"}
                )

                # Verify contextual options were passed
                if mock_process.called:
                    call_args = mock_process.call_args
                    chunk_options = call_args[1].get('chunk_options', {})

                    assert chunk_options.get('enable_contextual_chunking') == True
                    assert chunk_options.get('contextual_llm_model') == "gpt-3.5-turbo"

    @pytest.mark.parametrize("media_type,expected_method", [
        ("document", "sentences"),
        ("pdf", "sentences"),
        ("ebook", "ebook_chapters"),
        ("video", "sentences"),
        ("audio", "sentences")
    ])
    def test_contextual_chunking_with_different_media_types(
        self,
        test_client,
        mock_dependencies,
        auth_headers,
        media_type,
        expected_method
    ):
        """Test contextual chunking works with different media types."""
        form_data = {
            "media_type": media_type,
            "urls": json.dumps([f"https://example.com/test.{media_type}"]),
            "perform_chunking": "true",
            "enable_contextual_chunking": "true"
        }

        # Mock the appropriate processing function based on media type
        process_func = {
            "document": "process_document_content",
            "pdf": "process_pdf_task",
            "ebook": "process_epub",
            "video": "process_videos",
            "audio": "process_audio_files"
        }.get(media_type, "process_document_content")

        with patch(f'tldw_Server_API.app.api.v1.endpoints.media.{process_func}') as mock_process:
            mock_process.return_value = {"success": True, "media_id": 125}

            response = test_client.post(
                "/api/v1/media/add",
                data=form_data,
                headers=auth_headers
            )

            if mock_process.called:
                call_args = mock_process.call_args
                chunk_options = call_args[1].get('chunk_options', {})

                assert chunk_options.get('enable_contextual_chunking') == True
                # Check that the method is appropriate for media type
                if media_type == "ebook":
                    assert chunk_options.get('method') == "ebook_chapters"

    def test_batch_media_with_contextual_chunking(self, test_client, mock_dependencies, auth_headers):
        """Test batch media processing with contextual chunking."""
        form_data = {
            "media_type": "document",
            "urls": json.dumps([
                "https://example.com/doc1.pdf",
                "https://example.com/doc2.pdf",
                "https://example.com/doc3.pdf"
            ]),
            "perform_chunking": True,
            "enable_contextual_chunking": True,
            "contextual_llm_model": "claude-opus-4.1"
        }

        with patch('tldw_Server_API.app.api.v1.endpoints.media.process_document_content') as mock_process:
            mock_process.return_value = {"success": True, "media_id": 126}

            # Avoid real network: mock smart_download to create a temporary file inside provided temp_dir
            from pathlib import Path
            def _fake_download(url, temp_dir, allowed_extensions=None):
                p = Path(str(temp_dir)) / (Path(url).name or "test.pdf")
                p.write_text("dummy content")
                return p
            with patch('tldw_Server_API.app.api.v1.endpoints.media.smart_download', side_effect=_fake_download):
                response = test_client.post(
                    "/api/v1/media/add",
                    data=form_data,
                    headers=auth_headers
                )

            # Should be called for each URL
            assert mock_process.call_count >= 1

            # Each call should have contextual options
            for call in mock_process.call_args_list:
                chunk_options = call[1].get('chunk_options', {})
                assert chunk_options.get('enable_contextual_chunking') == True
                assert chunk_options.get('contextual_llm_model') == "claude-opus-4.1"

    def test_contextual_options_validation(self, test_client, mock_dependencies, auth_headers):
        """Test validation of contextual chunking options."""
        # Test with invalid context_window_size (too small)
        form_data = {
            "media_type": "document",
            "urls": json.dumps(["https://example.com/test.pdf"]),
            "perform_chunking": "true",
            "enable_contextual_chunking": "true",
            "context_window_size": "50"  # Below minimum of 100
        }

        response = test_client.post(
            "/api/v1/media/add",
            data=form_data,
            headers={"Authorization": "Bearer test_token"}
        )

        # Should get validation error
        assert response.status_code == 422

        # Test with invalid context_window_size (too large)
        form_data["context_window_size"] = "3000"  # Above maximum of 2000

        response = test_client.post(
            "/api/v1/media/add",
            data=form_data,
            headers={"Authorization": "Bearer test_token"}
        )

        assert response.status_code == 422

    def test_contextual_chunking_preserves_other_options(self, test_client, mock_dependencies, auth_headers):
        """Test that contextual options don't interfere with other chunking options."""
        form_data = {
            "media_type": "document",
            "urls": json.dumps(["https://example.com/test.pdf"]),
            "perform_chunking": "true",
            "chunk_method": "semantic",
            "chunk_size": "1000",
            "chunk_overlap": "200",
            "use_adaptive_chunking": "true",
            "use_multi_level_chunking": "true",
            "chunk_language": "en",
            "enable_contextual_chunking": "true",
            "contextual_llm_model": "gpt-4"
        }

        with patch('tldw_Server_API.app.api.v1.endpoints.media.process_document_content') as mock_process:
            mock_process.return_value = {"success": True, "media_id": 127}

            response = test_client.post(
                "/api/v1/media/add",
                data=form_data,
                headers={"Authorization": "Bearer test_token"}
            )

            if mock_process.called:
                call_args = mock_process.call_args
                chunk_options = call_args[1].get('chunk_options', {})

                # All options should be preserved
                assert chunk_options.get('method') == "semantic"
                assert chunk_options.get('max_size') == 1000
                assert chunk_options.get('overlap') == 200
                assert chunk_options.get('adaptive') == True
                assert chunk_options.get('multi_level') == True
                assert chunk_options.get('language') == "en"
                assert chunk_options.get('enable_contextual_chunking') == True
                assert chunk_options.get('contextual_llm_model') == "gpt-4"
